/*******************************************************************************
 Copyright (c) 2021-2023 Arm  Corporation All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

   * Redistributions of source code must retain the above copyright notice,
     this list of conditions and the following disclaimer.
   * Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.
   * Neither the name of Intel Corporation nor the names of its contributors
     may be used to endorse or promote products derived from this software
     without specific prior written permission.

 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/

/*-----------------------------------------------------------------------
* zuc_aarch64_top.c
*-----------------------------------------------------------------------
* An implementation of ZUC, the core algorithm for the
* 3GPP Confidentiality and Integrity algorithms.
*
*-----------------------------------------------------------------------*/

#ifndef ZUC_EEA3_1_BUFFER

#define ZUC_EEA3_1_BUFFER           zuc_eea3_1_buffer_aarch64
#define ZUC_EEA3_4_BUFFER           zuc_eea3_4_buffer_aarch64
#define ZUC_EEA3_N_BUFFER           zuc_eea3_n_buffer_aarch64
#define ZUC256_EEA3_1_BUFFER        zuc256_eea3_1_buffer_aarch64
#define ZUC256_EEA3_N_BUFFER        zuc256_eea3_n_buffer_aarch64
#define ZUC_EIA3_1_BUFFER           zuc_eia3_1_buffer_aarch64
#define ZUC_EIA3_4_BUFFER           zuc_eia3_4_buffer_aarch64
#define ZUC_EIA3_N_BUFFER           zuc_eia3_n_buffer_aarch64
#define ZUC_EIA3_4_BUFFER_JOB       zuc_eia3_4_buffer_job_aarch64
#define ZUC256_EIA3_1_BUFFER        zuc256_eia3_1_buffer_aarch64
#define ZUC256_EIA3_N_BUFFER        zuc256_eia3_n_buffer_aarch64
#define ZUC256_EIA3_4_BUFFER_JOB    zuc256_eia3_4_buffer_job_aarch64

#define ASM_ZUC_INITIALIZATION      asm_ZucInitialization_aarch64
#define ASM_ZUC_INITIALIZATION_4    asm_ZucInitialization_4_aarch64
#define ASM_ZUC_GEN_KEYSTREAM4B     asm_ZucGenKeystream4B_aarch64
#define ASM_ZUC_GEN_KEYSTREAM8B     asm_ZucGenKeystream8B_aarch64
#define ASM_ZUC_GEN_KEYSTREAM16B    asm_ZucGenKeystream16B_aarch64
#define ASM_ZUC_GEN_KEYSTREAM4B_4   asm_ZucGenKeystream4B_4_aarch64
#define ASM_ZUC_GEN_KEYSTREAM8B_4   asm_ZucGenKeystream8B_4_aarch64
#define ASM_ZUC_GEN_KEYSTREAM16B_4  asm_ZucGenKeystream16B_4_aarch64
#define ASM_ZUC_GEN_KEYSTREAM       asm_ZucGenKeystream_aarch64
#define ASM_ZUC_CIPHER_4            asm_ZucCipher_4_aarch64
#define ASM_XOR_KEYSTREAM16B        asm_XorKeyStream16B_aarch64
#define ASM_EIA3_ROUND16B           asm_Eia3Round16B_aarch64
#define ASM_EIA3_REMAINDER          asm_Eia3Remainder_aarch64
#define ASM_ZUC_AUTH_4              asm_ZucAuth_4_aarch64
#define ASM_ZUC256_INITIALIZATION   asm_Zuc256Initialization_aarch64
#define ASM_ZUC256_INITIALIZATION_4 asm_Zuc256Initialization_4_aarch64
#define ASM_ZUC256_AUTH_4           asm_Zuc256Auth_4_aarch64

#endif

#include "include/zuc_internal.h"
#include "ipsec-mb.h"
#include "clear_regs_mem_aarch64.h"
#include <stdbool.h>
#include "include/error.h"

#define NUM_BUFS 4
#define KEYSTR_ROUND_LEN 16

// Read 8*6 bits and store them as 8 partial bytes
// (using 6 least significant bits)
static void expand_from_6_to_8_bytes(uint8_t *pOutput, const uint8_t *pInput)
{
        uint64_t bit_mask[8] = {0x3f, 0xfc0, 0x3f000, 0xfc0000, 0x3f000000, 0xfc0000000,
                                0x3f000000000, 0xfc0000000000};
        uint8_t inputarr[8] = {0};
        uint64_t num64bit;
        int i;

        // store 6 bytes input to 8 bytes array in reverse order, inputarr[i] = 0xfedcba00
        for (i = 0; i <= 5; i++)
                inputarr[i] = *(pInput + (5 - i));

        // cast 8 bytes array to uint64 number, num64bit=0xabcdef
        num64bit = *(uint64_t *)(&inputarr[0]);

        for (i = 0; i <= 7; i++) {
                *(pOutput + i) = (num64bit & bit_mask[7 - i]) >> (48 - 6 * (i+1));
        }
}

static inline
void _zuc_eea3_1_buffer(const void *pKey,
                        const void *pIv,
                        const void *pBufferIn,
                        void *pBufferOut,
                        const uint32_t length,
                        const uint32_t key_size)
{
        DECLARE_ALIGNED(ZucState_t zucState, 16);
        DECLARE_ALIGNED(uint8_t keyStream[KEYSTR_ROUND_LEN], 16);

        const uint64_t *pIn64 = NULL;
        uint64_t *pOut64 = NULL, *pKeyStream64 = NULL;
        uint64_t *pTemp64 = NULL, *pdstTemp64 = NULL;

        uint32_t numKeyStreamsPerPkt = length/ KEYSTR_ROUND_LEN;
        const uint32_t numBytesLeftOver = length % KEYSTR_ROUND_LEN;

        if (key_size == 256) {
                ASM_ZUC256_INITIALIZATION(pKey, pIv, &(zucState), 2);
        } else {
                ASM_ZUC_INITIALIZATION(pKey, pIv, &(zucState));
        }

        /* Loop over all the Quad-Words in input buffer and XOR with the 64bits
         * of generated keystream
         */
        pOut64 = (uint64_t *) pBufferOut;
        pIn64 = (const uint64_t *) pBufferIn;

        while (numKeyStreamsPerPkt--) {
                /* Generate the key stream 16 bytes at a time */
                ASM_ZUC_GEN_KEYSTREAM16B((uint32_t *) &keyStream[0], &zucState);

                /* XOR The Keystream generated with the input buffer here */
                pKeyStream64 = (uint64_t *)keyStream;
                ASM_XOR_KEYSTREAM16B(pIn64, pOut64, pKeyStream64);
                pIn64 += 2;
                pOut64 += 2;
        }

        /* Check for remaining 0 to 15 bytes */
        if(numBytesLeftOver) {
                /* buffer to store 16 bytes of keystream */
                DECLARE_ALIGNED(uint8_t tempSrc[KEYSTR_ROUND_LEN], 16);
                DECLARE_ALIGNED(uint8_t tempDst[KEYSTR_ROUND_LEN], 16);
                const uint8_t *pIn8 = (const uint8_t *) pBufferIn;
                uint8_t *pOut8 = (uint8_t *) pBufferOut;
                const uint64_t num4BRounds = ((numBytesLeftOver - 1) / 4) + 1;

                ASM_ZUC_GEN_KEYSTREAM((uint32_t *) &keyStream[0], &zucState, num4BRounds);

                /* copy the remaining bytes into temporary buffer and XOR with
                 * the 64-bytes of keystream. Then copy on the valid bytes back
                 * to the output buffer */
                memcpy(&tempSrc[0], &pIn8[length - numBytesLeftOver], numBytesLeftOver);
                pKeyStream64 = (uint64_t *) &keyStream[0];
                pTemp64 = (uint64_t *) &tempSrc[0];
                pdstTemp64 = (uint64_t *) &tempDst[0];

                ASM_XOR_KEYSTREAM16B(pTemp64, pdstTemp64, pKeyStream64);
                memcpy(&pOut8[length - numBytesLeftOver], &tempDst[0],
                        numBytesLeftOver);

#ifdef SAFE_DATA
                imb_clear_mem(tempSrc, sizeof(tempSrc));
                imb_clear_mem(tempDst, sizeof(tempDst));
#endif
        }
}

static inline
void _zuc_eea3_4_buffer(const void * const pKey[NUM_BUFS],
                        const uint8_t *ivs,
                        const void * const pBufferIn[NUM_BUFS],
                        void *pBufferOut[NUM_BUFS],
                        const uint32_t length[NUM_BUFS],
                        const uint32_t key_size)
{
        DECLARE_ALIGNED(ZucState4_t state, 64);
        DECLARE_ALIGNED(ZucState_t singlePktState, 64);
        unsigned int i;
        /* Calculate the minimum input packet size */
        uint32_t bytes1 = (length[0] < length[1] ?
                           length[0] : length[1]);
        uint32_t bytes2 = (length[2] < length[3] ?
                           length[2] : length[3]);
        /* min number of bytes */
        uint32_t bytes = (bytes1 < bytes2) ? bytes1 : bytes2;
        uint32_t numKeyStreamsPerPkt;
        DECLARE_ALIGNED(uint32_t remainBytes[NUM_BUFS], 16) = {0};
        DECLARE_ALIGNED(uint8_t keyStr[NUM_BUFS][KEYSTR_ROUND_LEN], 64);
        /* structure to store the 4 keys */
        DECLARE_ALIGNED(ZucKey4_t keys, 64);
        uint32_t numBytesLeftOver = 0;
        const uint8_t *pTempBufInPtr = NULL;
        uint8_t *pTempBufOutPtr = NULL;
        DECLARE_ALIGNED(const uint64_t *pIn64[NUM_BUFS], 64) = {NULL};
        DECLARE_ALIGNED(uint64_t *pOut64[NUM_BUFS], 64) = {NULL};
        uint64_t *pKeyStream64 = NULL;

        /*
         * Calculate the number of bytes left over for each packet,
         * and setup the Keys and IVs
         */
        for (i = 0; i < NUM_BUFS; i++) {
                remainBytes[i] = length[i];
                keys.pKeys[i] = pKey[i];
        }

        if (key_size == 256)
                ASM_ZUC256_INITIALIZATION_4(&keys, ivs, &state, 2);
        else
                ASM_ZUC_INITIALIZATION_4(&keys, ivs, &state);

        for (i = 0; i < NUM_BUFS; i++) {
                pOut64[i] = (uint64_t *) pBufferOut[i];
                pIn64[i] = (const uint64_t *) pBufferIn[i];
        }

        /* Encrypt common length of all buffers */
        ASM_ZUC_CIPHER_4(&state, pIn64, pOut64,
                         remainBytes, bytes);

        /* process each packet separately for the remaining bytes */
        for (i = 0; i < NUM_BUFS; i++) {
                if (remainBytes[i]) {
                        /* need to copy the zuc state to single packet state */
                        singlePktState.lfsrState[0] = state.lfsrState[0][i];
                        singlePktState.lfsrState[1] = state.lfsrState[1][i];
                        singlePktState.lfsrState[2] = state.lfsrState[2][i];
                        singlePktState.lfsrState[3] = state.lfsrState[3][i];
                        singlePktState.lfsrState[4] = state.lfsrState[4][i];
                        singlePktState.lfsrState[5] = state.lfsrState[5][i];
                        singlePktState.lfsrState[6] = state.lfsrState[6][i];
                        singlePktState.lfsrState[7] = state.lfsrState[7][i];
                        singlePktState.lfsrState[8] = state.lfsrState[8][i];
                        singlePktState.lfsrState[9] = state.lfsrState[9][i];
                        singlePktState.lfsrState[10] = state.lfsrState[10][i];
                        singlePktState.lfsrState[11] = state.lfsrState[11][i];
                        singlePktState.lfsrState[12] = state.lfsrState[12][i];
                        singlePktState.lfsrState[13] = state.lfsrState[13][i];
                        singlePktState.lfsrState[14] = state.lfsrState[14][i];
                        singlePktState.lfsrState[15] = state.lfsrState[15][i];

                        singlePktState.fR1 = state.fR1[i];
                        singlePktState.fR2 = state.fR2[i];

                        numKeyStreamsPerPkt = remainBytes[i] / KEYSTR_ROUND_LEN;
                        numBytesLeftOver = remainBytes[i]  % KEYSTR_ROUND_LEN;

                        pTempBufInPtr = pBufferIn[i];
                        pTempBufOutPtr = pBufferOut[i];

                        /* update the output and input pointers here to point
                         * to the i'th buffers */
                        pOut64[0] = (uint64_t *) &pTempBufOutPtr[length[i] -
                                                                remainBytes[i]];
                        pIn64[0] = (const uint64_t *) &pTempBufInPtr[length[i] -
                                                                remainBytes[i]];

                        while (numKeyStreamsPerPkt--) {
                                /* Generate the key stream 16 bytes at a time */
                                ASM_ZUC_GEN_KEYSTREAM16B((uint32_t *) keyStr[0],
                                                         &singlePktState);
                                pKeyStream64 = (uint64_t *) keyStr[0];
                                ASM_XOR_KEYSTREAM16B(pIn64[0],
                                                     pOut64[0],
                                                     pKeyStream64);
                                pIn64[0] += 2;
                                pOut64[0] += 2;
                        }

                        /* Check for remaining 0 to 15 bytes */
                        if (numBytesLeftOver) {
                                DECLARE_ALIGNED(uint8_t tempSrc[16], 64);
                                DECLARE_ALIGNED(uint8_t tempDst[16], 64);
                                uint64_t *pTempSrc64;
                                uint64_t *pTempDst64;
                                uint32_t offset = length[i] - numBytesLeftOver;
                                const uint64_t num4BRounds =
                                        ((numBytesLeftOver - 1) / 4) + 1;

                                ASM_ZUC_GEN_KEYSTREAM((uint32_t *)&keyStr[0],
                                                      &singlePktState,
                                                      num4BRounds);
                                /* copy the remaining bytes into temporary
                                 * buffer and XOR with the 16 bytes of
                                 * keystream. Then copy on the valid bytes back
                                 * to the output buffer */
                                memcpy(&tempSrc[0], &pTempBufInPtr[offset],
                                       numBytesLeftOver);
                                memset(&tempSrc[numBytesLeftOver], 0,
                                       16 - numBytesLeftOver);

                                pKeyStream64 = (uint64_t *) &keyStr[0][0];
                                pTempSrc64 = (uint64_t *) &tempSrc[0];
                                pTempDst64 = (uint64_t *) &tempDst[0];
                                ASM_XOR_KEYSTREAM16B(pTempSrc64,
                                                     pTempDst64,
                                                     pKeyStream64);

                                memcpy(&pTempBufOutPtr[offset],
                                       &tempDst[0], numBytesLeftOver);
#ifdef SAFE_DATA
                                imb_clear_mem(tempSrc, sizeof(tempSrc));
                                imb_clear_mem(tempDst, sizeof(tempDst));
#endif
                        }
                }
        }
#ifdef SAFE_DATA
        /* Clear sensitive data in stack */
        imb_clear_mem(keyStr, sizeof(keyStr));
        imb_clear_mem(&singlePktState, sizeof(singlePktState));
        imb_clear_mem(&state, sizeof(state));
        imb_clear_mem(&keys, sizeof(keys));
#endif
}

static inline
void _zuc_eea3_n_buffer(const void * const pKey[],
                        const void * const pIv[],
                        const void * const pBufferIn[],
                        void *pBufferOut[],
                        const uint32_t length[],
                        const uint32_t numBuffers)
{
        unsigned int i = 0;
        unsigned int packetCount = numBuffers;

        while (packetCount >= NUM_BUFS) {
                /* structure to store the 4 IV's */
                DECLARE_ALIGNED(uint8_t ivs[NUM_BUFS*32], 16);
                uint32_t iv_idx;
                for (iv_idx = 0; iv_idx < NUM_BUFS; iv_idx++) {
                        memcpy(ivs + iv_idx*32, pIv[iv_idx + i], 16);
                }

                packetCount -= NUM_BUFS;
                _zuc_eea3_4_buffer(&pKey[i],
                                   ivs,
                                   &pBufferIn[i],
                                   &pBufferOut[i],
                                   &length[i],
                                   128);
                i += NUM_BUFS;
        }

        while(packetCount--) {
                _zuc_eea3_1_buffer(pKey[i],
                                   pIv[i],
                                   pBufferIn[i],
                                   pBufferOut[i],
                                   length[i],
                                   128);
                i++;
        }
}

void ZUC_EEA3_1_BUFFER(const void *pKey,
                       const void *pIv,
                       const void *pBufferIn,
                       void *pBufferOut,
                       const uint32_t length)
{
#ifdef SAFE_PARAM
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pBufferOut == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_DST);
                return;
        }

        /* Check input data is in range of supported length */
        if (length < ZUC_MIN_BYTELEN ||
            length > ZUC_MAX_BYTELEN) {
                imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
                return;
        }
#endif

         _zuc_eea3_1_buffer(pKey, pIv, pBufferIn, pBufferOut, length, 128);

#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC_EEA3_4_BUFFER(const void * const pKey[NUM_BUFS],
                       const void * const pIv[NUM_BUFS],
                       const void * const pBufferIn[NUM_BUFS],
                       void *pBufferOut[NUM_BUFS],
                       const uint32_t length[NUM_BUFS])
{
#ifdef SAFE_PARAM
        unsigned int i;
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);

        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pBufferOut == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_DST);
                return;
        }

        if (length == NULL) {
                imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
                return;
        }

        /* Check for NULL pointers and lengths for each buffer */
        for (i = 0; i < NUM_BUFS; i++) {
                if (pKey[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                        return;
                }

                if (pIv[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_IV);
                        return;
                }

                if (pBufferIn[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                        return;
                }

                if (pBufferOut[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_DST);
                        return;
                }

                /* Check input data is in range of supported length */
                if (length[i] < ZUC_MIN_BYTELEN ||
                    length[i] > ZUC_MAX_BYTELEN) {
                        imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
                        return;
                }
        }
#endif

        /* structure to store the 4 IV's */
        DECLARE_ALIGNED(uint8_t ivs[NUM_BUFS*32], 16);
        uint32_t iv_idx;
        for (iv_idx = 0; iv_idx < NUM_BUFS; iv_idx++) {
                memcpy(ivs + iv_idx*32, pIv[iv_idx], 16);
        }
        _zuc_eea3_4_buffer(pKey, ivs, pBufferIn, pBufferOut, length, 128);

#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC_EEA3_N_BUFFER(const void * const pKey[],
                       const void * const pIv[],
                       const void * const pBufferIn[],
                       void *pBufferOut[],
                       const uint32_t length[],
                       const uint32_t numBuffers)
{
#ifdef SAFE_PARAM
        unsigned int i;
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);

        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pBufferOut == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_DST);
                return;
        }

        if (length == NULL) {
                imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
                return;
        }

        /* Check for NULL pointers and lengths for each buffer */
        for (i = 0; i < numBuffers; i++) {
                if (pKey[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                        return;
                }

                if (pIv[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_IV);
                        return;
                }

                if (pBufferIn[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                        return;
                }

                if (pBufferOut[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_DST);
                        return;
                }

                /* Check input data is in range of supported length */
                if (length[i] < ZUC_MIN_BYTELEN ||
                    length[i] > ZUC_MAX_BYTELEN) {
                        imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
                        return;
                }
        }
#endif

        _zuc_eea3_n_buffer(pKey, pIv, pBufferIn, pBufferOut, length, numBuffers);

#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC256_EEA3_1_BUFFER(const void *pKey,
                          const void *pIv,
                          const uint32_t ivLen,
                          const void *pBufferIn,
                          void *pBufferOut,
                          const uint32_t length)
{
#ifdef SAFE_PARAM
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);
        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pBufferOut == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_DST);
                return;
        }

        /* Check input data is in range of supported length */
        if(length < ZUC_MIN_BYTELEN || length > ZUC_MAX_BYTELEN) {
                imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
                return;
        }

        if (ivLen != 23 && ivLen != 25) {
                imb_set_errno(NULL, IMB_ERR_IV_LEN);
                return;
        }
#endif
        uint8_t iv[32];
        if (ivLen == 25) {
                memcpy(iv, pIv, 25);
        } else {
                // copy first 17 bytes
                memcpy(iv, pIv, 17);
                // expand next 6 bytes to 8 bytes
                expand_from_6_to_8_bytes(iv + 17, (const uint8_t *)pIv + 17);
        }

        _zuc_eea3_1_buffer(pKey, iv, pBufferIn, pBufferOut, length, 256);

#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC256_EEA3_N_BUFFER(const void * const pKey[],
                          const void * const pIv[],
                          const uint32_t ivLen[],
                          const void * const pBufferIn[],
                          void *pBufferOut[],
                          const uint32_t length[],
                          const uint32_t numBuffers)
{
        unsigned int i, iv_idx;
        unsigned int packetCount = numBuffers;
        uint8_t ivs[32 * NUM_BUFS];

#ifdef SAFE_PARAM
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);

        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (ivLen == NULL) {
                imb_set_errno(NULL, IMB_ERR_IV_LEN);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pBufferOut == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_DST);
                return;
        }

        if (length == NULL) {
                imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
                return;
        }

        /* Check for NULL pointers and lengths for each buffer */
        for (i = 0; i < numBuffers; i++) {
                if (pKey[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                        return;
                }

                if (pIv[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_IV);
                        return;
                }

                if (ivLen[i] != 23 && ivLen[i] != 25) {
                        imb_set_errno(NULL, IMB_ERR_IV_LEN);
                        return;
                }

                if (pBufferIn[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                        return;
                }

                if (pBufferOut[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_DST);
                        return;
                }

                /* Check input data is in range of supported length */
                if (length[i] < ZUC_MIN_BYTELEN ||
                    length[i] > ZUC_MAX_BYTELEN) {
                        imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
                        return;
                }
        }
#endif
        i = 0;

        while(packetCount >= 4) {
                for (iv_idx = 0; iv_idx < 4; iv_idx++) {
                        if (ivLen[i + iv_idx] == 25) {
                                memcpy(ivs + iv_idx * 32, pIv[i + iv_idx], 25);
                        } else {
                                // copy first 17 bytes
                                memcpy(ivs + iv_idx * 32, pIv[i + iv_idx], 17);
                                // expand next 6 bytes to 8 bytes
                                expand_from_6_to_8_bytes(ivs + iv_idx * 32 + 17,
                                                         (const uint8_t *)pIv[i + iv_idx] + 17);
                        }
                }
                packetCount -= 4;
                _zuc_eea3_4_buffer(&pKey[i],
                                   ivs,
                                   &pBufferIn[i],
                                   &pBufferOut[i],
                                   &length[i],
                                   256);
                i += 4;
        }

        while(packetCount--) {
                if (ivLen[i] == 25) {
                        memcpy(ivs, pIv[i], 25);
                } else {
                        // copy first 17 bytes
                        memcpy(ivs, pIv[i], 17);
                        // expand next 6 bytes to 8 bytes
                        expand_from_6_to_8_bytes(ivs + 17, (const uint8_t *)pIv[i] + 17);
                }
                _zuc_eea3_1_buffer(pKey[i],
                                   ivs,
                                   pBufferIn[i],
                                   pBufferOut[i],
                                   length[i],
                                   256);
                i++;
        }

#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

static inline uint64_t rotate_left(uint64_t u, size_t r)
{
        return (((u) << (r)) | ((u) >> (64 - (r))));
}

static inline uint64_t load_uint64(const void *ptr)
{
        return *((const uint64_t *)ptr);
}

static inline
void _zuc_eia3_1_buffer(const void *pKey,
                        const void *pIv,
                        const void *pBufferIn,
                        const uint32_t lengthInBits,
                        uint32_t *pMacI,
                        const uint32_t key_size,
                        const uint64_t tag_size)
{
        DECLARE_ALIGNED(ZucState_t zucState, 16);
        DECLARE_ALIGNED(uint32_t keyStream[4 * 2], 64);
        const uint32_t keyStreamLengthInBits = KEYSTR_ROUND_LEN * 8;
        /* generate a key-stream 2 words longer than the input message */
        uint32_t L = (lengthInBits + 31 + (tag_size << 4)) / ZUC_WORD_BITS;
        uint32_t *pZuc = (uint32_t *) &keyStream[0];
        uint32_t remainingBits = lengthInBits;
        uint32_t T[4] = {0};
        const uint8_t *pIn8 = (const uint8_t *) pBufferIn;
        uint32_t last_key_idx = (L - 1) % 4;

        if (key_size == 256) {
                ASM_ZUC256_INITIALIZATION(pKey, pIv, &(zucState), tag_size);
                /* Initialize the tags with the first 4/8/16 bytes of keystream */
                switch (tag_size)
                {
                case 4:
                        ASM_ZUC_GEN_KEYSTREAM4B(pZuc, &zucState);
                        L -= 1;
                        break;
                case 8:
                        ASM_ZUC_GEN_KEYSTREAM8B(pZuc, &zucState);
                        L -= 2;
                        break;
                case 16:
                        ASM_ZUC_GEN_KEYSTREAM16B(pZuc, &zucState);
                        L -= 4;
                        break;
                default:
                        return;
                }
                memcpy(T, pZuc, tag_size);
        } else {
                ASM_ZUC_INITIALIZATION(pKey, pIv, &(zucState));
        }

        ASM_ZUC_GEN_KEYSTREAM16B(pZuc, &zucState);
        /* 4 KS words are generated already */
        L = (L > 4) ? (L - 4) : 0;

        /* loop over the message bits */
        while (remainingBits >= keyStreamLengthInBits) {
                remainingBits -=  keyStreamLengthInBits;

                IMB_ASSERT(L > 0);
                /* Generate the next key stream */
                ASM_ZUC_GEN_KEYSTREAM16B(&keyStream[4], &zucState);
                if (L > 4) {
                        L -= 4;
                } else {
                        last_key_idx = L - 1;
                        L = 0;
                }

                ASM_EIA3_ROUND16B(T, keyStream, pIn8, tag_size);
                /* Copy the last keystream generated to the first 16 bytes */
                memcpy(&keyStream[0], &keyStream[4], KEYSTR_ROUND_LEN);
                pIn8 = &pIn8[KEYSTR_ROUND_LEN];
        }

        /* Generate final keystream if needed */
        IMB_ASSERT(L <= 4);
        if (L > 0) {
                ASM_ZUC_GEN_KEYSTREAM(&keyStream[4],
                                      &zucState, L);
                last_key_idx = 4 + L - 1;
        }

        if (key_size == 256) {
                /* Fullfill the last block with 0s, to simpify the compuation
                * of last block */
                uint32_t remainBytes = (remainingBits + 7) / 8;
                uint8_t pIn8_tmp[16] = {0};
                memcpy(pIn8_tmp, pIn8, remainBytes);
                uint32_t clearBits = 8 - (remainingBits % 8);
                if (clearBits == 8)
                        clearBits = 0;
                if (remainBytes != 0) {
                        pIn8_tmp[remainBytes - 1] = ((pIn8_tmp[remainBytes - 1]) >> clearBits)
                                                    << clearBits;
                }
                ASM_EIA3_ROUND16B(T, keyStream, pIn8_tmp, tag_size);

                uint64_t tag, tag1, tag2, ks1, ks2, ks3, ks4;
                switch (tag_size)
                {
                case 4:
                        T[0] ^= rotate_left(load_uint64(&keyStream[remainingBits / 32]),
                                            remainingBits % 32);
                        /* save the final MAC-I result */
                        *pMacI = bswap4(T[0]);
                        break;
                case 8:
                        tag = T[1];
                        tag = tag << 32 | T[0];
                        ks1 = rotate_left(load_uint64(&keyStream[remainingBits / 32]),
                                          remainingBits % 32);
                        ks2 = rotate_left(load_uint64(&keyStream[remainingBits / 32 + 1]),
                                          remainingBits % 32);
                        ks1 = (ks1 << 32) >> 32;
                        ks2 = ks2 << 32;
                        ks1 = ks1 ^ ks2;
                        tag = tag ^ ks1;
                        tag = BSWAP64(tag);
                        tag = (tag >> 32) | (tag << 32);
                        /* save the final MAC-I result */
                        memcpy(pMacI, &tag, tag_size);
                        break;
                case 16:
                        tag1 = T[1];
                        tag1 = tag1 << 32 | T[0];
                        tag2 = T[3];
                        tag2 = tag2 << 32 | T[2];

                        ks1 = rotate_left(load_uint64(&keyStream[remainingBits / 32]),
                                          remainingBits % 32);
                        ks2 = rotate_left(load_uint64(&keyStream[remainingBits / 32 + 1]),
                                          remainingBits % 32);
                        ks3 = rotate_left(load_uint64(&keyStream[remainingBits / 32 + 2]),
                                          remainingBits % 32);
                        ks4 = rotate_left(load_uint64(&keyStream[remainingBits / 32 + 3]),
                                          remainingBits % 32);
                        ks1 = (ks1 << 32) >> 32;
                        ks2 = ks2 << 32;
                        ks1 = ks1 ^ ks2;
                        tag1 = tag1 ^ ks1;
                        ks3 = (ks3 << 32) >> 32;
                        ks4 = ks4 << 32;
                        ks3 = ks3 ^ ks4;
                        tag2 = tag2 ^ ks3;
                        tag1 = BSWAP64(tag1);
                        tag1 = (tag1 >> 32) | (tag1 << 32);
                        tag2 = BSWAP64(tag2);
                        tag2 = (tag2 >> 32) | (tag2 << 32);
                        /* save the final MAC-I result */
                        memcpy(pMacI, &tag1, 8);
                        memcpy(pMacI + 2, &tag2, 8);
                        break;
                default:
                        break;
                }
        } else {
                T[0] ^= ASM_EIA3_REMAINDER(&keyStream[0], pIn8, remainingBits);
                T[0] ^= rotate_left(load_uint64(&keyStream[remainingBits / 32]),
                                    remainingBits % 32);

                /* save the final MAC-I result */
                uint32_t keyBlock = keyStream[last_key_idx];
                T[0] ^= keyBlock;
                *pMacI = bswap4(T[0]);
        }

#ifdef SAFE_DATA
        /* Clear sensitive data (in registers and stack) */
        imb_clear_mem(keyStream, sizeof(keyStream));
        imb_clear_mem(&zucState, sizeof(zucState));
#endif
}

static inline
void _zuc_eia3_4_buffer(const void * const pKey[NUM_BUFS],
                        const uint8_t *ivs,
                        const void * const pBufferIn[NUM_BUFS],
                        const uint32_t lengthInBits[NUM_BUFS],
                        uint32_t *pMacI[NUM_BUFS],
                        const uint32_t job_api,
                        const void * const job_in_lane[NUM_BUFS],
                        const uint32_t key_size,
                        const uint64_t tag_size)
{
        unsigned int i;
        DECLARE_ALIGNED(ZucState4_t state, 64);
        DECLARE_ALIGNED(ZucState_t singlePktState, 64);
        DECLARE_ALIGNED(uint8_t keyStr[NUM_BUFS][2*KEYSTR_ROUND_LEN], 64);
        /* structure to store the 4 keys */
        DECLARE_ALIGNED(ZucKey4_t keys, 64);
        const uint8_t *pIn8[NUM_BUFS] = {NULL};
        uint32_t remainCommonBits;
        uint32_t numKeyStr = 0;
        uint32_t T[NUM_BUFS * 4] = {0};
        const uint32_t keyStreamLengthInBits = KEYSTR_ROUND_LEN * 8;
        DECLARE_ALIGNED(uint32_t *pKeyStrArr[NUM_BUFS], 16) = {NULL};

        /* Check if all lengths are equal */
        if ((lengthInBits[0] == lengthInBits[1]) &&
            (lengthInBits[0] == lengthInBits[2]) &&
            (lengthInBits[0] == lengthInBits[3])) {
                remainCommonBits = lengthInBits[0];
        } else {
                /* Calculate the minimum input packet size */
                uint32_t bits1 = (lengthInBits[0] < lengthInBits[1] ?
                                  lengthInBits[0] : lengthInBits[1]);
                uint32_t bits2 = (lengthInBits[2] < lengthInBits[3] ?
                                  lengthInBits[2] : lengthInBits[3]);

                remainCommonBits = (bits1 < bits2) ? bits1 : bits2;
        }

        for (i = 0; i < NUM_BUFS; i++) {
                pIn8[i] = (const uint8_t *) pBufferIn[i];
                pKeyStrArr[i] = (uint32_t *) &keyStr[i][0];
                keys.pKeys[i] = pKey[i];
        }

        if (key_size == 256) {
                ASM_ZUC256_INITIALIZATION_4(&keys, ivs, &state, tag_size);
                /* Initialize the tags with the first 4/8/16 bytes of keystream */
                switch (tag_size)
                {
                case 4:
                        ASM_ZUC_GEN_KEYSTREAM4B_4(&state, pKeyStrArr);
                        break;
                case 8:
                        ASM_ZUC_GEN_KEYSTREAM8B_4(&state, pKeyStrArr);
                        break;
                case 16:
                        ASM_ZUC_GEN_KEYSTREAM16B_4(&state, pKeyStrArr);
                        break;
                default:
                        return;
                }
                for (i = 0; i < NUM_BUFS; i++)
                        memcpy(&T[i * tag_size / 4], pKeyStrArr[i], tag_size);
        } else {
                ASM_ZUC_INITIALIZATION_4(&keys, ivs, &state);
        }

        numKeyStr = remainCommonBits / keyStreamLengthInBits;
        if (key_size == 256) {
                ASM_ZUC256_AUTH_4(&state, T, pIn8, numKeyStr, pKeyStrArr, tag_size);
        } else  {
                ASM_ZUC_AUTH_4(&state, T, pIn8, numKeyStr, pKeyStrArr);
        }
        remainCommonBits = remainCommonBits % keyStreamLengthInBits;

        /* Point at the next 16 bytes of the key */
        for (i = 0; i < NUM_BUFS; i++)
                pKeyStrArr[i] = (uint32_t *) &keyStr[i][KEYSTR_ROUND_LEN];

        /* Process each packet separately for the remaining bits */
        for (i = 0; i < NUM_BUFS; i++) {
                if (job_api && (job_in_lane[i] == NULL))
                        continue;

                uint32_t remainBits = lengthInBits[i] - numKeyStr*keyStreamLengthInBits;
                uint32_t *keyStr32 = (uint32_t *) keyStr[i];
                uint32_t N = remainBits + ((uint32_t) tag_size << 4);

                /* For zuc256, first tag_sz words have been generated to initialize tags */
                if (key_size == 256) {
                        N -= tag_size << 3;
                }
                uint32_t L = ((N + 31) / ZUC_WORD_BITS);
                uint32_t last_key_idx = (L - 1) % 4;
                /* 4 KS words are generated already */
                L = (L > 4) ? (L - 4) : 0;


                /* If remaining bits are more than 4 bytes, we need to generate
                 * at least 4B more of keystream, so we need to copy
                 * the zuc state to single packet state first
                 */
                if (L > 0) {
                        singlePktState.lfsrState[0] = state.lfsrState[0][i];
                        singlePktState.lfsrState[1] = state.lfsrState[1][i];
                        singlePktState.lfsrState[2] = state.lfsrState[2][i];
                        singlePktState.lfsrState[3] = state.lfsrState[3][i];
                        singlePktState.lfsrState[4] = state.lfsrState[4][i];
                        singlePktState.lfsrState[5] = state.lfsrState[5][i];
                        singlePktState.lfsrState[6] = state.lfsrState[6][i];
                        singlePktState.lfsrState[7] = state.lfsrState[7][i];
                        singlePktState.lfsrState[8] = state.lfsrState[8][i];
                        singlePktState.lfsrState[9] = state.lfsrState[9][i];
                        singlePktState.lfsrState[10] = state.lfsrState[10][i];
                        singlePktState.lfsrState[11] = state.lfsrState[11][i];
                        singlePktState.lfsrState[12] = state.lfsrState[12][i];
                        singlePktState.lfsrState[13] = state.lfsrState[13][i];
                        singlePktState.lfsrState[14] = state.lfsrState[14][i];
                        singlePktState.lfsrState[15] = state.lfsrState[15][i];

                        singlePktState.fR1 = state.fR1[i];
                        singlePktState.fR2 = state.fR2[i];
                }

                while (remainBits >= keyStreamLengthInBits) {
                        remainBits -= keyStreamLengthInBits;
                        IMB_ASSERT(L > 0);
                        /* Generate the next key stream 4 bytes or 16 bytes */
                        if (L > 4) {
                                ASM_ZUC_GEN_KEYSTREAM16B(&keyStr32[4],
                                                         &singlePktState);
                                L -= 4;
                        } else {
                                ASM_ZUC_GEN_KEYSTREAM(&keyStr32[4],
                                                      &singlePktState, L);
                                last_key_idx = L - 1;
                                L = 0;
                        }
                        ASM_EIA3_ROUND16B(&T[i * tag_size / 4], keyStr32,
                                          pIn8[i], tag_size);
                        /* Copy the last keystream generated
                         * to the first 16 bytes */
                        memcpy(keyStr32, &keyStr32[4], KEYSTR_ROUND_LEN);
                        pIn8[i] = &pIn8[i][KEYSTR_ROUND_LEN];
                }

                /* Generate final keystream if needed */
                IMB_ASSERT(L <= 4);
                if (L > 0) {
                        ASM_ZUC_GEN_KEYSTREAM(&keyStr32[4],
                                              &singlePktState, L);
                        last_key_idx = 4 + L - 1;
                }

                if (key_size == 256) {
                        /* Fullfill the last block with 0s, to simpify the compuation
                        * of last block */
                        uint32_t remainBytes = (remainBits + 7) / 8;
                        uint8_t pIn8_tmp[16] = {0};
                        memcpy(pIn8_tmp, pIn8[i], remainBytes);
                        uint32_t clearBits = 8 - (remainBits % 8);
                        if (clearBits == 8)
                                clearBits = 0;
                        if (remainBytes != 0) {
                                pIn8_tmp[remainBytes - 1] = (pIn8_tmp[remainBytes - 1]
                                                             >> clearBits)
                                                            << clearBits;
                        }
                        ASM_EIA3_ROUND16B(&T[i * tag_size / 4], keyStr32,
                                          pIn8_tmp, tag_size);

                        uint64_t tag, tag1, tag2, ks1, ks2, ks3, ks4;
                        switch (tag_size)
                        {
                        case 4:
                                T[i * tag_size / 4] ^= rotate_left(
                                                        load_uint64(&keyStr32[remainBits / 32]),
                                                        remainBits % 32);
                                /* save the final MAC-I result */
                                *(pMacI[i]) = bswap4(T[i * tag_size / 4]);
                                break;
                        case 8:
                                tag = T[i * tag_size / 4 + 1];
                                tag = tag << 32 | T[i * tag_size / 4];

                                ks1 = rotate_left(
                                       load_uint64(&keyStr32[remainBits / 32]),
                                       remainBits % 32);
                                ks2 = rotate_left(
                                       load_uint64(&keyStr32[remainBits / 32 + 1]),
                                       remainBits % 32);
                                ks1 = (ks1 << 32) >> 32;
                                ks2 = ks2 << 32;
                                ks1 = ks1 ^ ks2;
                                tag = tag ^ ks1;
                                tag = BSWAP64(tag);
                                tag = (tag >> 32) | (tag << 32);
                                /* save the final MAC-I result */
                                memcpy(pMacI[i], &tag, tag_size);
                                break;
                        case 16:
                                tag1 = T[i * tag_size / 4 + 1];
                                tag1 = tag1 << 32 | T[i * tag_size / 4 ];
                                tag2 = T[i * tag_size / 4 + 3];
                                tag2 = tag2 << 32 | T[i * tag_size / 4 + 2];

                                ks1 = rotate_left(
                                       load_uint64(&keyStr32[remainBits / 32]),
                                       remainBits % 32);
                                ks2 = rotate_left(
                                       load_uint64(&keyStr32[remainBits / 32 + 1]),
                                       remainBits % 32);
                                ks3 = rotate_left(
                                       load_uint64(&keyStr32[remainBits / 32 + 2]),
                                       remainBits % 32);
                                ks4 = rotate_left(
                                       load_uint64(&keyStr32[remainBits / 32 + 3]),
                                       remainBits % 32);
                                ks1 = (ks1 << 32) >> 32;
                                ks2 = ks2 << 32;
                                ks1 = ks1 ^ ks2;
                                tag1 = tag1 ^ ks1;
                                ks3 = (ks3 << 32) >> 32;
                                ks4 = ks4 << 32;
                                ks3 = ks3 ^ ks4;
                                tag2 = tag2 ^ ks3;
                                tag1 = BSWAP64(tag1);
                                tag1 = (tag1 >> 32) | (tag1 << 32);
                                tag2 = BSWAP64(tag2);
                                tag2 = (tag2 >> 32) | (tag2 << 32);
                                /* save the final MAC-I result */
                                memcpy(pMacI[i], &tag1, 8);
                                memcpy(pMacI[i] + 2, &tag2, 8);
                                break;
                        default:
                                break;
                        }
                } else {
                        uint32_t keyBlock = keyStr32[last_key_idx];
                        T[i] ^= ASM_EIA3_REMAINDER(keyStr32, pIn8[i], remainBits);
                        T[i] ^= rotate_left(load_uint64(&keyStr32[remainBits / 32]),
                                            remainBits % 32);
                        /* save the final MAC-I result */
                        *(pMacI[i]) = bswap4(T[i] ^ keyBlock);
                }
        }

#ifdef SAFE_DATA
        /* Clear sensitive data (in registers and stack) */
        imb_clear_mem(keyStr, sizeof(keyStr));
        imb_clear_mem(&singlePktState, sizeof(singlePktState));
        imb_clear_mem(&state, sizeof(state));
        imb_clear_mem(&keys, sizeof(keys));
#endif
}

static inline
void _zuc_eia3_n_buffer(const void * const pKey[],
                        const void * const pIv[],
                        const void * const pBufferIn[],
                        const uint32_t lengthInBits[],
                        uint32_t *pMacI[],
                        const uint32_t numBuffers)
{
        unsigned int i = 0;
        unsigned int packetCount = numBuffers;

        while(packetCount >= 4) {
                /* structure to store the 4 IV's */
                DECLARE_ALIGNED(uint8_t ivs[NUM_BUFS*32], 16);
                unsigned int iv_idx;
                for (iv_idx = 0; iv_idx < 4; iv_idx++)
                        memcpy(ivs + iv_idx * 32, pIv[iv_idx + i], 16);

                packetCount -= 4;
                _zuc_eia3_4_buffer(&pKey[i], ivs, &pBufferIn[i],
                                   &lengthInBits[i], &pMacI[i],
                                   0, NULL, 128, 4);
                i += 4;
        }

        while(packetCount--) {
                _zuc_eia3_1_buffer(pKey[i], pIv[i], pBufferIn[i],
                                   lengthInBits[i], pMacI[i], 128, 4);
                i++;
        }
}

void ZUC_EIA3_1_BUFFER(const void *pKey,
                       const void *pIv,
                       const void *pBufferIn,
                       const uint32_t lengthInBits,
                       uint32_t *pMacI)
{
#ifdef SAFE_PARAM
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);
        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pMacI == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                return;
        }

        /* Check input data is in range of supported length */
        if (lengthInBits < ZUC_MIN_BITLEN || lengthInBits > ZUC_MAX_BITLEN) {
                imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                return;
        }
#endif

        _zuc_eia3_1_buffer(pKey, pIv, pBufferIn, lengthInBits, pMacI, 128, 4);

#ifdef SAFE_DATA
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC_EIA3_4_BUFFER(const void * const pKey[NUM_BUFS],
                       const void * const pIv[NUM_BUFS],
                       const void * const pBufferIn[NUM_BUFS],
                       const uint32_t lengthInBits[NUM_BUFS],
                       uint32_t *pMacI[NUM_BUFS])
{
#ifdef SAFE_PARAM
        unsigned int i;
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);

        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pMacI == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                return;
        }

        if (lengthInBits == NULL) {
                imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                return;
        }

        /* Check for NULL pointers and lengths for each buffer */
        for (i = 0; i < NUM_BUFS; i++) {
                if (pKey[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                        return;
                }

                if (pIv[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_IV);
                        return;
                }

                if (pBufferIn[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                        return;
                }

                if (pMacI[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                        return;
                }

                /* Check input data is in range of supported length */
                if (lengthInBits[i] < ZUC_MIN_BITLEN ||
                    lengthInBits[i] > ZUC_MAX_BITLEN) {
                        imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                        return;
                }
        }
#endif

        /* structure to store the 4 IV's */
        DECLARE_ALIGNED(uint8_t ivs[NUM_BUFS*32], 16);
        unsigned int iv_idx;
        for (iv_idx = 0; iv_idx < NUM_BUFS; iv_idx++)
                memcpy(ivs + iv_idx * 32, pIv[iv_idx], 16);

        _zuc_eia3_4_buffer(pKey, ivs, pBufferIn, lengthInBits,
                           pMacI, 0, NULL, 128, 4);

#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC_EIA3_N_BUFFER(const void * const pKey[],
                       const void * const pIv[],
                       const void * const pBufferIn[],
                       const uint32_t lengthInBits[],
                       uint32_t *pMacI[],
                       const uint32_t numBuffers)
{
#ifdef SAFE_PARAM
        unsigned int i;
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);

        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pMacI == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                return;
        }

        if (lengthInBits == NULL) {
                imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                return;
        }

        /* Check for NULL pointers and lengths for each buffer */
        for (i = 0; i < numBuffers; i++) {
                if (pKey[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                        return;
                }

                if (pIv[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_IV);
                        return;
                }

                if (pBufferIn[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                        return;
                }

                if (pMacI[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                        return;
                }

                /* Check input data is in range of supported length */
                if (lengthInBits[i] < ZUC_MIN_BITLEN ||
                    lengthInBits[i] > ZUC_MAX_BITLEN) {
                        imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                        return;
                }
        }
#endif

        _zuc_eia3_n_buffer(pKey, pIv, pBufferIn, lengthInBits,
                           pMacI, numBuffers);

#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC256_EIA3_1_BUFFER(const void *pKey,
                          const void *pIv,
                          const uint32_t ivLen,
                          const void *pBufferIn,
                          const uint32_t lengthInBits,
                          uint32_t *pMacI,
                          const uint64_t tag_size)
{
#ifdef SAFE_PARAM
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);
        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pMacI == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                return;
        }

        /* Check input data is in range of supported length */
        if (lengthInBits < ZUC_MIN_BITLEN || lengthInBits > ZUC_MAX_BITLEN) {
                imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                return;
        }
        if (tag_size != 4 && tag_size != 8 && tag_size != 16) {
                imb_set_errno(NULL, IMB_ERR_AUTH_TAG_LEN);
                return;
        }
#endif

        uint8_t iv[32];
        if (ivLen == 25) {
                memcpy(iv, pIv, 25);
        } else {
                // copy first 17 bytes
                memcpy(iv, pIv, 17);
                // expand next 6 bytes to 8 bytes
                expand_from_6_to_8_bytes(iv + 17, (const uint8_t *)pIv + 17);
        }

        _zuc_eia3_1_buffer(pKey, iv, pBufferIn, lengthInBits,
                           pMacI, 256, tag_size);

#ifdef SAFE_DATA
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC256_EIA3_N_BUFFER(const void * const pKey[],
                          const void * const pIv[],
                          const uint32_t ivLen[],
                          const void * const pBufferIn[],
                          const uint32_t lengthInBits[],
                          uint32_t *pMacI[],
                          const uint64_t tag_size,
                          const uint32_t numBuffers)
{
        unsigned int i, iv_idx;
        unsigned int packetCount = numBuffers;
        uint8_t ivs[32 * NUM_BUFS];

#ifdef SAFE_PARAM
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);

        /* Check for NULL pointers */
        if (pKey == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                return;
        }

        if (pIv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return;
        }

        if (ivLen == NULL) {
                imb_set_errno(NULL, IMB_ERR_IV_LEN);
                return;
        }

        if (pBufferIn == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return;
        }

        if (pMacI == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                return;
        }

        if (lengthInBits == NULL) {
                imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                return;
        }

        /* Check for NULL pointers and lengths for each buffer */
        for (i = 0; i < numBuffers; i++) {
                if (pKey[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_KEY);
                        return;
                }

                if (pIv[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_IV);
                        return;
                }

                if (ivLen[i] != 23 && ivLen[i] != 25) {
                        imb_set_errno(NULL, IMB_ERR_IV_LEN);
                        return;
                }

                if (pBufferIn[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                        return;
                }

                if (pMacI[i] == NULL) {
                        imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                        return;
                }

                /* Check input data is in range of supported length */
                if (lengthInBits[i] < ZUC_MIN_BITLEN ||
                    lengthInBits[i] > ZUC_MAX_BITLEN) {
                        imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                        return;
                }
        }

        if (tag_size != 4 && tag_size != 8 && tag_size != 16) {
                imb_set_errno(NULL, IMB_ERR_AUTH_TAG_LEN);
                return;
        }
#endif
        i = 0;

        while(packetCount >= 4) {
                for (iv_idx = 0; iv_idx < 4; iv_idx++) {
                        if (ivLen[i + iv_idx] == 25) {
                                memcpy(ivs + iv_idx * 32, pIv[i + iv_idx], 25);
                        } else {
                                // copy first 17 bytes
                                memcpy(ivs + iv_idx * 32, pIv[i + iv_idx], 17);
                                // expand next 6 bytes to 8 bytes
                                expand_from_6_to_8_bytes(ivs + iv_idx * 32 + 17,
                                                         (const uint8_t *)pIv[i + iv_idx] + 17);
                        }
                }
                packetCount -= 4;

                _zuc_eia3_4_buffer(&pKey[i], ivs, &pBufferIn[i], &lengthInBits[i],
                                   &pMacI[i], 0, NULL, 256, tag_size);
                i += 4;
        }

        while(packetCount--) {
                if (ivLen[i] == 25) {
                        memcpy(ivs, pIv[i], 25);
                } else {
                        // copy first 17 bytes
                        memcpy(ivs, pIv[i], 17);
                        // expand next 6 bytes to 8 bytes
                        expand_from_6_to_8_bytes(ivs + 17, (const uint8_t *)pIv[i] + 17);
                }
                _zuc_eia3_1_buffer(pKey[i], ivs, pBufferIn[i], lengthInBits[i],
                                   pMacI[i], 256, tag_size);
                i++;
        }

#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC_EIA3_4_BUFFER_JOB(const void * const pKey[NUM_BUFS],
                           const uint8_t *ivs,
                           const void * const pBufferIn[NUM_BUFS],
                           uint32_t *pMacI[NUM_BUFS],
                           const uint32_t lengthInBits[NUM_BUFS],
                           const void * const job_in_lane[NUM_BUFS])
{

        _zuc_eia3_4_buffer(pKey, ivs, pBufferIn, lengthInBits,
                           pMacI, 1, job_in_lane, 128, 4);
#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}

void ZUC256_EIA3_4_BUFFER_JOB(const void * const pKey[NUM_BUFS],
                              const uint8_t *ivs,
                              const void * const pBufferIn[NUM_BUFS],
                              uint32_t *pMacI[NUM_BUFS],
                              const uint32_t lengthInBits[NUM_BUFS],
                              const void * const job_in_lane[NUM_BUFS],
                              const uint64_t tag_size)
{
        _zuc_eia3_4_buffer(pKey, ivs, pBufferIn, lengthInBits, pMacI,
                           1, job_in_lane, 256, tag_size);
#ifdef SAFE_DATA
        /* Clear sensitive data in registers */
        CLEAR_SCRATCH_GPS();
        CLEAR_SCRATCH_SIMD_REGS();
#endif
}